-------------------------------------------------------------
-- MSS copyright 2013
--	Filename:  ELASTIC_BUFFER_NRAMB.VHD
-- Author: Alain Zarembowitch / MSS
--	Version: 4
--	Date last modified: 8/26/13
-- Inheritance: 	N/A
--
-- description:  elastic buffer to ease flow-control timing requirements. 
-- Buffer depth is parametric: NRAMB*16kB.
-- Input and output signals use the same synchronous clock. Cross-domain is not supported here.
-- Limited to 8+1 bit width at this time.... TODO: add other commonly used RAMB widths.
-- usage: SYNC_RESET pulse is mandatory
--
-- Rev 2 5/19/12 AZ
-- Two new features: (a) buffer empty flag and (b) read-ahead by one address
--
-- Rev 3 1/31/13 AZ
-- Prevent spurious burst of data at power up. 
-- New constraint: SYNC_RESET pulse is mandatory
--
-- Rev 4 8/26/13 AZ
-- encapsulated dual port ram for more generic VHDL code
---------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
Library UNISIM;
use UNISIM.vcomponents.all;

entity ELASTIC_BUFFER_NRAMB is
	generic (
		NRAMB: integer := 1;
			-- number of 16Kb dual-port RAM blocks instantiated within.
			-- Valid values: 1,2,4,8
		NBITS: integer := 9;
			-- data width. Valid values: 9 (future: 1,2,18,36)
		READ_AHEAD_1ADDRESS: std_logic := '0'
			-- Some applications need the read pointer to be pre-positioned one address ahead so as to alleviate the 
			-- 1 CLK read delay. However, when the buffer is empty, the memory location sent to the output is meaningless.
	);
    port ( 
		--GLOBAL CLOCKS, RESET
		CLK : in std_logic;				-- synchronous clock
		SYNC_RESET: in std_logic;	-- MANDATORY!

		--// Input data stream
		DATA_IN: in std_logic_vector((NBITS-1) downto 0);
			-- Read at rising edge of CLK when SAMPLE_CLK_IN  = '1'.
		SAMPLE_CLK_IN: in std_logic;
			-- one CLK wide pulse.
			-- Maximum rate: one SAMPLE_CLK_IN per CLK.
		SAMPLE_CLK_IN_REQ: out std_logic;
			-- requests data bits from the module upstream.
			-- = Clear To Send CTS


		--// Output data stream
		DATA_OUT: out std_logic_vector((NBITS-1) downto 0);
			-- Read at rising edge of CLK when SAMPLE_CLK_OUT  = '1'.
		SAMPLE_CLK_OUT: out std_logic;
			-- one CLK-wide pulse
		SAMPLE_CLK_OUT_E: out std_logic;
			-- 1 CLK earlier sample clk (if needed)
		SAMPLE_CLK_OUT_REQ: in std_logic;
			-- module downstream requests data.
			-- = Clear To Send CTS
			-- Latency between request and SAMPLE_CLK_OUT is one CLK IF data is available.
		BUFFER_EMPTY: out std_logic
			-- '1' when buffer is empty, '0' otherwise

			);
end entity;

architecture behavioral of ELASTIC_BUFFER_NRAMB is
--------------------------------------------------------
--      COMPONENTS
--------------------------------------------------------
	COMPONENT BRAM_DP
	generic (
		DATA_WIDTHA: integer;
		ADDR_WIDTHA: integer;
		DATA_WIDTHB: integer;
		ADDR_WIDTHB: integer
	);	
	PORT(
	    CLKA   : in  std_logic;
	    WEA    : in  std_logic;
	    ADDRA  : in  std_logic_vector(ADDR_WIDTHA-1 downto 0);
	    DIA   : in  std_logic_vector(DATA_WIDTHA-1 downto 0);
	    DOA  : out std_logic_vector(DATA_WIDTHA-1 downto 0);
	    CLKB   : in  std_logic;
	    WEB    : in  std_logic;
	    ADDRB  : in  std_logic_vector(ADDR_WIDTHB-1 downto 0);
	    DIB   : in  std_logic_vector(DATA_WIDTHB-1 downto 0);
	    DOB  : out std_logic_vector(DATA_WIDTHB-1 downto 0)
		);
	END COMPONENT;
--------------------------------------------------------
--     SIGNALS
--------------------------------------------------------
-- Suffix _D indicates a one CLK delayed version of the net with the same name
-- Suffix _E indicates an extended precision version of the net with the same name
-- Suffix _N indicates an inverted version of the net with the same name
constant K: integer := 14;	-- write and read pointers, sizes, computed with K bits. 14 -> 128Kbits = 16KBytes
signal WPTR: std_logic_vector((K-1) downto 0) := (others => '0');
signal ADDR_MASK: std_logic_vector((K-1) downto 0);
signal WPTR_MEMINDEX: std_logic_vector(2 downto 0) := (others => '0');
signal WEA: std_logic_vector((NRAMB-1) downto 0) := (others => '0');
type DOBtype is array(integer range 0 to (NRAMB-1)) of std_logic_vector((NBITS-1) downto 0);
signal DOB: DOBtype;
signal RPTR: std_logic_vector((K-1) downto 0) := (others => '0');
signal RPTR2: std_logic_vector((K-1) downto 0) := (others => '0');
signal BUF_SIZE: std_logic_vector((K-1) downto 0) := (others => '0');
signal AVAILABLE_BUF_SPACE:  std_logic_vector((K-1) downto 0) := (others => '1');
signal RPTR_MEMINDEX: std_logic_vector(2 downto 0) := (others => '0');
signal SAMPLE_CLK_OUT_E_local: std_logic := '0';
signal RESET_UNTIL_1STSAMPLEIN_N: std_logic := '0';
--------------------------------------------------------
--      IMPLEMENTATION
--------------------------------------------------------
begin

-- write pointer management.
WPTR_GEN_001: process(CLK)
begin
	if rising_edge(CLK) then
		if(SYNC_RESET = '1') then
			RESET_UNTIL_1STSAMPLEIN_N <= '1';
			WPTR <= (others => '0');
		elsif(SAMPLE_CLK_IN = '1') then
			WPTR <= (WPTR + 1) and ADDR_MASK;
			RESET_UNTIL_1STSAMPLEIN_N <= '1';
		end if;
	end if;
end process;

-- Mask upper address bits, depending on the memory depth (1,2,4, or 8 RAMblocks)
WPTR_MEMINDEX <= WPTR(13 downto 11) when (NRAMB = 8) else
				"0" & WPTR(12 downto 11) when (NRAMB = 4) else
				"00" & WPTR(11 downto 11) when (NRAMB = 2) else
				"000"; -- when  (NRAMB = 1) 

-- select which RAMBlock to write to.
WEA_GEN_001: process(WPTR_MEMINDEX, SAMPLE_CLK_IN)
begin
	for J in 0 to (NRAMB -1) loop
		if(WPTR_MEMINDEX = J) then	-- range 0 through 7
			WEA(J) <= SAMPLE_CLK_IN;
		else
			WEA(J) <= '0';
		end if;
	end loop;
end process;

-- 1,2,4, or 8 RAM blocks.
RAMB_16_S9_S9_Y: for J in 0 to (NRAMB-1) generate
	
	Inst_BRAM_DP: BRAM_DP 
	GENERIC MAP(
		DATA_WIDTHA => 9,
		ADDR_WIDTHA => 11,	-- 9K memory
		DATA_WIDTHB => 9,
		ADDR_WIDTHB => 11
	)
	PORT MAP(
		CLKA => CLK,
		WEA => WEA(J),
		ADDRA => WPTR(10 downto 0),
		DIA => DATA_IN,
		DOA => open,
		CLKB => CLK,
		WEB => '0',
		ADDRB => RPTR2(10 downto 0),
		DIB => "000000000",
		DOB => DOB(J)
	);
end generate;

-- read ahead one address?
RPTR2 <= (RPTR + 1) when (READ_AHEAD_1ADDRESS = '1') else RPTR;

-- Mask the address bits depending on the number of block RAMs instantiated (NRAMB)
ADDR_MASK <= 	"00011111111111" when (NRAMB = 1) else
					"00111111111111" when (NRAMB = 2) else
					"01111111111111" when (NRAMB = 4) else
					"11111111111111" ; --when (NRAMB = 8) 
					
BUF_SIZE <= (WPTR + not(RPTR)) and ADDR_MASK;
AVAILABLE_BUF_SPACE <= (not BUF_SIZE) and ADDR_MASK;
BUFFER_EMPTY <= '1' when (BUF_SIZE = 0) else '0';

-- ask data source for more data when RAMB has at least 1K bytes free)
TX_CTS_GEN: process(CLK)
begin
	if rising_edge(CLK) then
		if(SYNC_RESET = '1') then
			SAMPLE_CLK_IN_REQ <= '0';
		elsif (AVAILABLE_BUF_SPACE(AVAILABLE_BUF_SPACE'left downto 10) /= 0)  then
			SAMPLE_CLK_IN_REQ <= '1';
		else
			SAMPLE_CLK_IN_REQ <= '0';
		end if;
	end if;
end process;

-- read pointer management
RPTR_GEN_001: process(CLK)
begin
	if rising_edge(CLK) then
		if(SYNC_RESET = '1') then
			RPTR <= ADDR_MASK;
			SAMPLE_CLK_OUT_E_local <= '0';
		else
			SAMPLE_CLK_OUT <= SAMPLE_CLK_OUT_E_local;
			
			-- select which RAMB to read from
			-- Mask upper address bits, depending on the memory depth (1,2,4, or 8 RAMblocks)
			if(NRAMB = 1) then
				RPTR_MEMINDEX <= "000";
			elsif (NRAMB = 2) then
				RPTR_MEMINDEX <= "00" & RPTR2(11 downto 11);
			elsif (NRAMB = 4) then
				RPTR_MEMINDEX <= "0" & RPTR2(12 downto 11);
			else
			--elsif (NRAMB = 8) then
				RPTR_MEMINDEX <= RPTR2(13 downto 11);
			end if;
				
			if(RESET_UNTIL_1STSAMPLEIN_N = '0') then
				-- initialize read pointer at power up until first input sample (i.e. don't wait for ASYNC_RESET which could be delayed,
				-- resulting in a spurious burst of zero output samples)
				RPTR <= ADDR_MASK;
			elsif(BUF_SIZE > 0) and (SAMPLE_CLK_OUT_REQ = '1') then
				-- two conditions must be met to read data from the RAMB:
				-- there is at least one byte in the RAMB and
				-- destination requests more bits 
				RPTR <= (RPTR + 1) and ADDR_MASK;
				SAMPLE_CLK_OUT_E_local <= '1';
			else
				-- nothing to read
				SAMPLE_CLK_OUT_E_local <= '0';
			end if;
		end if;
	end if;
end process;

DATA_OUT <= DOB(conv_integer(RPTR_MEMINDEX));
SAMPLE_CLK_OUT_E <= SAMPLE_CLK_OUT_E_local;
end architecture;
